Description of the simulations

Here we simulate from the true model, with the following parameters.

  • 2 unknown factors in W
  • V intercept
  • X intercept
  • Genewise dispersion

To have realistic values for the parameters, we estimated the values starting from the allen dataset, randomly selecting 100 cells.

For each simulated data, we fit a ZINB model with all the possible combinations (16) of the following parameters:

  • Dimension of W: 1 through 4
  • V intercept: yes, no
  • Dispersion: common, genewise

Results

Explore the results for one simulated matrix

load("./datasets/sim_allen5.rda")
load("./datasets/sim_allen5_fitted.rda")
load("./datasets/sim_allen5_first.rda")
true_W <- simModel@W
dtrue <- as.matrix(dist(true_W))
biotrue <- as.numeric(factor(bio))

plot(true_W, pch=19, col=biotrue, main="True W", xlab="W1", ylab="W2")

plot(fit[[1]]@W, rep(0, NROW(true_W)), pch=19, col=biotrue, main="K=1", xlab="W1", ylab="")

plot(fit[[2]]@W, pch=19, col=biotrue, main="K=2", xlab="W1", ylab="W2")

pairs(fit[[3]]@W, pch=19, col=biotrue, main="K=3")

pairs(fit[[4]]@W, pch=19, col=biotrue, main="K=4")

Estimates

Bias

load("./datasets/sim_allen5_bias.rda")
K <- 1:4

ggplot(bias[bias$param == 'beta_mu', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'beta_mu', ], aes(x = V, y = bias)) + 
  geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'beta_pi', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'beta_pi', ], aes(x = V, y = bias)) + 
  geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'gamma_mu', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('gamma_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'gamma_mu' & bias$V == 'V', ],
       aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('gamma_mu') + facet_grid( ~ disp) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'gamma_pi', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('gamma_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'gamma_pi' & bias$V == 'V', ],
       aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('gamma_pi') + facet_grid( ~ disp) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'theta', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('theta') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'theta', ],
       aes(x = disp, y = bias)) + 
  geom_boxplot() + ggtitle('theta') + facet_grid(V ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'walpha_mu', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('walpha_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'walpha_mu', ],
       aes(x = interaction(disp, K, V), y = bias)) + 
  geom_boxplot() + ggtitle('walpha_mu') + xlab('') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'walpha_pi', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('walpha_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'walpha_pi', ],
       aes(x = interaction(disp, K, V), y = bias)) + 
  geom_boxplot() + ggtitle('walpha_pi') + xlab('') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'mu', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(bias[bias$param == 'pi', ], aes(x = K, y = bias)) + 
  geom_boxplot() + ggtitle('pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

Variance

load("./datasets/sim_allen5_variance.rda")
K <- 1:4

ggplot(variance[variance$param == 'beta_mu', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'beta_mu', ], aes(x = V, y = variance)) + 
  geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'beta_pi', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'beta_pi', ], aes(x = V, y = variance)) + 
  geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'gamma_mu', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('gamma_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'gamma_mu' & variance$V == 'V', ],
       aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('gamma_mu') + facet_grid( ~ disp) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'gamma_pi', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('gamma_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'gamma_pi' & variance$V == 'V', ],
       aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('gamma_pi') + facet_grid( ~ disp) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'theta', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('theta') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'theta', ],
       aes(x = disp, y = variance)) + 
  geom_boxplot() + ggtitle('theta') + facet_grid(V ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'walpha_mu', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('walpha_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'walpha_mu', ],
       aes(x = interaction(disp, K, V), y = variance)) + 
  geom_boxplot() + ggtitle('walpha_mu') + xlab('') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'walpha_pi', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('walpha_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'walpha_pi', ],
       aes(x = interaction(disp, K, V), y = variance)) + 
  geom_boxplot() + ggtitle('walpha_pi') + xlab('') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'mu', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(variance[variance$param == 'pi', ], aes(x = K, y = variance)) + 
  geom_boxplot() + ggtitle('pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

MSE

mse = bias
colnames(mse)[1] = c('mse')
mse$mse = bias$bias^2 + variance$variance
ggplot(mse[mse$param == 'beta_mu', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'beta_mu', ], aes(x = V, y = mse)) + 
  geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'beta_pi', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'beta_pi', ], aes(x = V, y = mse)) + 
  geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'gamma_mu', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('gamma_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'gamma_mu' & mse$V == 'V', ],
       aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('gamma_mu') + facet_grid( ~ disp) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'gamma_pi', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('gamma_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'gamma_pi' & mse$V == 'V', ],
       aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('gamma_pi') + facet_grid( ~ disp) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'theta', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('theta') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'theta', ],
       aes(x = disp, y = mse)) + 
  geom_boxplot() + ggtitle('theta') + facet_grid(V ~ K) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'walpha_mu', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('walpha_mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'walpha_mu', ],
       aes(x = interaction(disp, K, V), y = mse)) + 
  geom_boxplot() + ggtitle('walpha_mu') + xlab('') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'walpha_pi', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('walpha_pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'walpha_pi', ],
       aes(x = interaction(disp, K, V), y = mse)) + 
  geom_boxplot() + ggtitle('walpha_pi') + xlab('') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'mu', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('mu') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

ggplot(mse[mse$param == 'pi', ], aes(x = K, y = mse)) + 
  geom_boxplot() + ggtitle('pi') + facet_grid(disp ~ V) + 
  geom_hline(yintercept = 0, col = 'red')

Impact on dimensionality reduction

load("./datasets/sim_allen5_dist.rda")
corr5 <- lapply(6:10, function(i) rowMeans(sapply(res, function(x) x[[i]])))
corr5 = do.call(cbind, corr5)
corr5 = data.frame(corr5,stringsAsFactors = F)
colnames(corr5) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
corr5$pzero = .5
boxplot(corr5[,1:5],border=c(1, 2, 1, 1), xlab="method", ylab="Correlation",
        las=2, 
        main="Correlation between true and estimated sample distances in W\npzero = 0.5")

load("./datasets/sim_allen25_dist.rda")
corr25 <- lapply(6:10, function(i) rowMeans(sapply(res, function(x) x[[i]])))
corr25 = do.call(cbind, corr25)
corr25 = data.frame(corr25,stringsAsFactors = F)
colnames(corr25) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
corr25$pzero = .25
boxplot(corr25[,1:5], main="Correlation between true and estimated sample distances in W\npzero = 0.25", border=c(1, 2, 1, 1), xlab="method", ylab="Correlation", las=2)

corr = rbind(corr5, corr25)
corr = melt(corr, id.vars = 'pzero')
ggplot(corr, aes(x = variable, y= value, col = variable)) + geom_boxplot() +
  facet_grid(~ pzero) + ggtitle('Correlation between true and estimated sample distances in W') + ylab('Correlation') + xlab('Method')

ggplot(corr, aes(x = factor(pzero), y= value, col = factor(pzero))) +
  geom_boxplot() + facet_grid(~ variable) + ggtitle('Correlation between true and estimated sample distances in W') + ylab('Correlation') + xlab('Method')

Silhouette width

load("./datasets/sim_allen5_dist.rda")
sil5 <- lapply(11:15, function(i) rowMeans(sapply(res, function(x) x[[i]])))
sil5 = do.call(cbind, sil5)
sil5 = data.frame(sil5,stringsAsFactors = F)
colnames(sil5) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
sil5$pzero = .5
boxplot(sil5[,1:5],border=c(1, 2, 1, 1), xlab="", ylab="Silhouette width",
        las=2, 
        main="Silhouette width of true labels\npzero = 0.5")

load("./datasets/sim_allen25_dist.rda")
sil25 <- lapply(11:15, function(i) rowMeans(sapply(res, function(x) x[[i]])))
sil25 = do.call(cbind, sil25)
sil25 = data.frame(sil25,stringsAsFactors = F)
colnames(sil25) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
sil25$pzero = .25
boxplot(sil25[,1:5], main="Silhouette width of true labels\npzero = 0.25", border=c(1, 2, 1, 1), xlab="", ylab="Silhouette width", las=2)

sil = rbind(sil25, sil5)
sil = melt(sil, id.vars = 'pzero')
ggplot(sil, aes(x = variable, y= value, col = variable)) + geom_boxplot() +
  facet_grid(~ pzero) + ggtitle('Silhouette width of true labels') + ylab('Silhouette width') + xlab('')

ggplot(sil, aes(x = factor(pzero), y= value, col = factor(pzero))) +
  geom_boxplot() + facet_grid(~ variable) + ggtitle('Silhouette width of true labels') + ylab('Silhouette width') + xlab('')

load('./datasets/sim_allen25.rda')
load('./datasets/sim_allen25_fitted.rda')
fit = fittedSim[[2]][[1]][[2]][[1]]
fq <- betweenLaneNormalization(t(simData[[1]]$counts), which="full")
pca <- prcomp(t(log1p(fq)))
par(mfrow=c(1,2))
plot(pca$x, col = biotrue, main = 'PCA\nfZero = 0.25')
plot(fit@W, col = biotrue, main = 'Estimated W, k = 2\nfZero = 0.25', 
     xlab = 'W1', ylab = 'W2')

load('./datasets/sim_allen5.rda')
load('./datasets/sim_allen5_fitted.rda')
fit = fittedSim[[2]][[1]][[2]][[1]]
fq <- betweenLaneNormalization(t(simData[[1]]$counts), which="full")
pca <- prcomp(t(log1p(fq)))
par(mfrow=c(1,2))
plot(pca$x, col = biotrue, main = 'PCA\nfZero = 0.5')
plot(fit@W, col = biotrue, main = 'Estimated W, k = 2\nfZero = 0.5', 
     xlab = 'W1', ylab = 'W2')

Model fit

Let’s look at model fit when chosen parameters are correct (V, genewise dispersion, K=2).

Zero Fraction 50%

computeExp <- function(zinbModel, model = 'zinb'){
  if (model == 'zinb'){
    (1 - t(getPi(zinbModel))) * t(getMu(zinbModel))
  }else{
    t(getMu(zinbModel))
  }
}

computeVar <- function(zinbModel, model = 'zinb'){
  mu = t(getMu(zinbModel))
  pi = t(getPi(zinbModel))
  phi = exp(-getZeta(zinbModel))
  if (model == 'zinb'){
    (1 - pi) * mu * (1 + mu*(phi + pi))
  }else{
    mu * (1 + mu*phi)
  }
}

computeP0 <- function(zinbModel, model = 'zinb'){
  mu = t(getMu(zinbModel))
  pi = t(getPi(zinbModel))
  phi = exp(-getZeta(zinbModel))
  if (model == 'zinb'){
    pi + (1 - pi) * (1 + phi * mu) ^ (-1/phi)
  }else{
    (1 + phi * mu) ^ (-1/phi)
  }
}

plotMD <- function(x, y, xlim = c(0,10), ylim = c(-5, 5),
                   main = 'ZINB: MD-plot estimated vs. observed mean count, log scale'){
  mm = .5*(x + y)
  dd = x - y
  smoothScatter(mm, dd, xlim = xlim, ylim = ylim, xlab = 'Mean', ylab = 'Diff', main = main)
  abline(h = 0, col = 'gray')
  fit = loess(dd ~ mm)
  xpred = seq(0, 10, .1)
  pred = predict(fit, xpred)
  lines(xpred, pred, col = 'red', type = 'l', lwd=2)
}

load('./datasets/sim_allen5_fitted.rda')
zz = fittedSim[[2]][[1]][[2]][[1]]
zinbEY = log(rowMeans(computeExp(zz, model = 'zinb')))
nbEY = log(rowMeans(computeExp(zz, model = 'nb')))
logAveCount = log(rowMeans(originalCounts))
prop0 = rowMeans(originalCounts == 0)
zinbPY0 = rowMeans(computeP0(zz, model = 'zinb'))
nbPY0 = rowMeans(computeP0(zz, model = 'nb'))

plotMD(logAveCount, zinbEY, xlim = c(2, 12), ylim = c(-15, 5),
       main = 'ZINB: MD-plot estimated vs. observed mean count, log scale')

plotMD(logAveCount, zinbEY, xlim = c(3, 10), ylim = c(-2, 2),
       main = 'ZINB: MD-plot estimated vs. observed mean count,\n log scale, zoomed')

plotMD(logAveCount, nbEY, xlim = c(0, 60), ylim = c(-110, 2),
       main = 'NB: MD-plot estimated vs. observed mean count, log scale')
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'

plotMD(logAveCount, nbEY, xlim = c(3, 10), ylim = c(-5, 5),
       main = 'NB: MD-plot estimated vs. observed mean count,\n log scale, zoomed')

MD-plot estimated vs. observed mean count, log scale, zoomed

par(mfrow=c(1,2))
plotMD(logAveCount, zinbEY, xlim = c(3, 10), ylim = c(-5, 5),
       main = 'ZINB')
plotMD(logAveCount, nbEY, xlim = c(3, 10), ylim = c(-5, 5),
       main = 'NB')

MD-plot estimated vs. observed zero probability

par(mfrow=c(1,2))
plotMD(prop0, zinbPY0, xlim = c(0, 1), ylim = c(-1, 1),
       main = 'ZINB')
plotMD(prop0, nbPY0, xlim = c(0, 1), ylim = c(-1, 1), 
       main = 'NB')

smoothScatter(logAveCount, rowMeans(originalCounts == 0), xlab = 'log average count',
              ylab = 'proportion of zeros', main = 'Zero probability versus Mean')
fit = loess(zinbPY0 ~ zinbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)
fit = loess(nbPY0 ~ nbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'green', type = 'l', lwd=2)
fit = loess(rowMeans(originalCounts == 0) ~ logAveCount)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'blue', type = 'l', lwd=2)
legend('topright', c('observed', 'zinb', 'nb'), 
   lty=1, col=c('blue', 'red', 'green'), bty='n', cex=.75)

smoothScatter(rowMeans(originalCounts == 0), exp(-zz@zeta),
              xlab = 'Observed zero probability', ylab = 'Estimated dispersion',
              main = 'ZINB: dispersion versus zero probability')
fit = loess(exp(-zz@zeta) ~ rowMeans(originalCounts == 0))
xpred = seq(0, 1, .05)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)

Zero Fraction 25%

load('./datasets/sim_allen25_fitted.rda')
zz = fittedSim[[2]][[1]][[2]][[1]]
zinbEY = log(rowMeans(computeExp(zz, model = 'zinb')))
logAveCount = log(rowMeans(originalCounts))
nbEY = log(rowMeans(computeExp(zz, model = 'nb')))
zinbPY0 = rowMeans(computeP0(zz, model = 'zinb'))
prop0 = rowMeans(originalCounts == 0)
nbPY0 = rowMeans(computeP0(zz, model = 'nb'))

MD-plot estimated vs. observed mean count, log scale, zoomed

par(mfrow=c(1,2))
plotMD(logAveCount, zinbEY, xlim = c(3, 10), ylim = c(-5, 5),
       main = 'ZINB')
plotMD(logAveCount, nbEY, xlim = c(3, 10), ylim = c(-5, 5),
       main = 'NB')

MD-plot estimated vs. observed zero probability

par(mfrow=c(1,2))
plotMD(prop0, zinbPY0, xlim = c(0, 1), ylim = c(-1, 1),
       main = 'ZINB')
plotMD(prop0, nbPY0, xlim = c(0, 1), ylim = c(-1, 1), 
       main = 'NB')

smoothScatter(logAveCount, rowMeans(originalCounts == 0), xlab = 'log average count',
              ylab = 'proportion of zeros', main = 'Zero probability versus Mean')
fit = loess(zinbPY0 ~ zinbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)
fit = loess(nbPY0 ~ nbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'green', type = 'l', lwd=2)
fit = loess(rowMeans(originalCounts == 0) ~ logAveCount)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'blue', type = 'l', lwd=2)
legend('topright', c('observed', 'zinb', 'nb'), 
   lty=1, col=c('blue', 'red', 'green'), bty='n', cex=.75)

smoothScatter(rowMeans(originalCounts == 0), exp(-zz@zeta),
              xlab = 'Observed zero probability', ylab = 'Estimated dispersion',
              main = 'ZINB: dispersion versus zero probability')
fit = loess(exp(-zz@zeta) ~ rowMeans(originalCounts == 0))
xpred = seq(0, 1, .05)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)